#!/usr/bin/env python
# encoding: utf-8
"""
mti.py

Handles output from MTI; turns an MTI file into a nice set of chunked
linelists (see nlm_output.py).

The line parsers are quite intolerant, and throw exceptions on all malformed
lines. The document parser tolerates the ones that aren't likely to impact
parsing quality (i.e. no line id, because it means that there was an error
message in the code or something similar).

Created by Jorge Herskovic on 2008-05-27.
Copyright (c) 2008 Jorge Herskovic. All rights reserved.
"""
from MEDRank.file.chunkmap import FakeChunkmap
from MEDRank.utility.logger import logging, ULTRADEBUG
from MEDRank.file.nlm_output import (Line, ChunkedNLMOutput,
                                     CUINotFoundError, ParsingError,
                                     NoConfidenceError,
                                     NoLineIDError)

DEFAULT_LINES_TO_IGNORE=["------", '*** error ***']

class MtiLine(Line):
    """Represents a single line of MTI output."""
    __slots__=['_cui', '_description', '_source', '_type']
    def __init__(self, original_line):
        Line.__init__(self, original_line, id_position=0)
        try:
            self._cui=self.split_line[2]
        except IndexError:
            raise CUINotFoundError("There was no CUI in the line '%s'" % 
                                   self._line)
        if self._cui=='':
            raise CUINotFoundError("There was no CUI in the line '%s'" % 
                                   self._line)
        try:
            self._description=self.split_line[1]
            self._source=self.split_line[7]
            self._type=self.split_line[4].upper()
        except IndexError:
            raise ParsingError("Data missing from line '%s'" % self._line)
        # Some entities have no stated confidence. We use 0 in such cases,
        # so they can be eliminated from the workflow later.
        try:
            self.confidence=float(self.split_line[3])/1000.0
        except ValueError:
            raise NoConfidenceError("Could not parse a confidence value in "
                                    "line '%s'" % self._line)
        logging.log(ULTRADEBUG, "Created a MtiLine @ %d: %s (%s) %1.3f", 
                      self.line_id, self._cui,
                      self._description, self.confidence)
    # The CUI property.
    def cui_fget(self):
        "Getter for the CUI property"
        return self._cui
    CUI=property(cui_fget)
    # The description property.
    def description_fget(self):
        "Getter for the description property"
        return self._description
    description=property(description_fget)
    # The source property.
    def source_fget(self):
        "Getter for the source property"
        return self._source
    source=property(source_fget)
    def heading_fget(self):
        return self.description
    heading=property(heading_fget)
    def type_fget(self):
        return self._type
    heading_type=property(type_fget)
    
class MtiOutput(ChunkedNLMOutput):
    """Represents an MTI output file."""
    def __init__(self, fileobject, lines_to_ignore=DEFAULT_LINES_TO_IGNORE,
                 chunkmap=FakeChunkmap()):
        ChunkedNLMOutput.__init__(self, type_of_lines=MtiLine,
                                    fileobject=fileobject, 
                                    lines_to_ignore=lines_to_ignore,
                                    chunkmap=chunkmap)
    def ignore_exception(self, which_exception, on_which_line):
        """Checks whether an exception generated by the parser is actionable
        or if it should be ignored. We ignore CUINotFoundError, because it is
        impossible to do anything in MEDRank without a CUI."""
        if type(which_exception) is CUINotFoundError:
            logging.log(ULTRADEBUG, "Skipping line '%s' because no CUI could be found "
                          "on it" % on_which_line)
            return True
        return False


